path = "./../../data/PPD_hairCortisol_PlosOne_.sav"
dataset= read_sav(path) #meta data dropped?
write.csv(dataset, file = "./../../data/data_span.csv")import googletrans as gt
import pandas as pd
data = pd.read_csv('./../../data/data_span.csv',encoding = "ISO-8859-1")
trans = gt.Translator()
with open("./../../data/data_programatic_translation.csv", 'w') as f:
f.write('original, translated\n')
for old_col in data.columns.values:
new_col = trans.translate(old_col, src = 'spanish', dest='en').text
f.write(old_col + ',' + new_col + '\n')
data.rename(columns = {old_col:new_col}, inplace = True)
data.to_csv( "./../../data/data_eng.csv" )Paper Variables
df = read.csv("./../../data/data_eng.csv", header = TRUE)
#df %>% variable.names()
# THIS IS WHERE WE CAN DROP UNUNSED VARIABLES, MUTATE NAMES AND ADD META DATA DESCRIBING VARIABLES (variable attribute data)
# There are 29 rows of all NA variables that were brought in from SPSS empty rows
before <- df %>% nrow()
df <- df %>% na.omit()
after <- df %>% nrow()
print(paste('rows dropped:', after-before))## [1] "rows dropped: -29"
df <- df %>% mutate('age' = Age)
attr(df$age, "Description") <- "The age of the mother"
df <- df %>% mutate("nationality" = NationalityDicotomica)
attr(df$NationalityDicotomica, 'Description') <- "The patient's nationality or country of origin at the time of pregnancy"
#df <- df %>% mutate('Marital_Status' = ) # CANT FIND THIS ONE
df <- df %>% mutate('employed' = EmploymentSituationDico)
attr(df$employed, 'Description') <- "Employment situation; is or is not employed at time of pregnancy"
df <- df %>% mutate('occupation' = Profession)
attr(df$occupation, "Description") <- 'If employed, what was the mothers occupation at the time of pregnancy'
df <- df %>% mutate("education_level" = Level_Studies)
attr(df$education_level, 'Description') <- "The highest level of education that the mother had received at the time of pregnancy/study"
df <- df %>% mutate('sport' = Sport)
attr(df$sport, 'Description') <- 'tracking the physical activity of the mother; whether the mother played a sport (during pregnancy) [y/n]' # ???
df <- df %>% mutate('pet' = PetDicotomica)
attr(df$pet, 'Description') <- 'designation for mothers with pets at time of pregnancy'
df <- df %>% mutate('dyed_hair' = Appearance)
attr(df$dyed_hair, "Description") <- 'designation for mothers with dyed hair during pregnancy' # I THINK this is right, need to check
df <- df %>% mutate('first_pregnancy' = FirstPregnancy)
attr(df$first_pregnancy, 'Description') <- 'primiparous; designation for mothers who are having a child for the first time'
df <- df %>% mutate('wanted_pregnancy' = WantedPregnancyDico)
attr(df$wanted_pregnancy, 'Description') <- 'Designation for mothers who desired the pregnancy (planned?)'
df <- df %>% mutate('pregnancy_method' = PregnancyMethodDico)
attr(df$pregnancy_method, 'Description') <- 'designation for *spontaneous* method of fertilization and *Fertility Treatment* or artificial insemination' # PC term? also, lol on "spontaneous"
df <- df %>% mutate('previous_miscarriage' = PreviousMiscarriagesDico)
attr(df$previous_miscarriage, 'Description') <- 'Designation for mothers who have had a miscarriage prior to the current pregnancy'
# df <- df %>% mutate('delivery' = ) # CANT FIND DELIVERY AND LABOR
#df <- mutate('delivery_antisthesia' = ) # Not sure what this one is
df <- df %>% mutate('fetus_sex' = SexFetalDico)
attr(df$fetus_sex, "Description") <- 'the sex of the fetus' # numeric mapping ???
# -------- DEPRESSION METRICS
df <- df %>% mutate('postpartum_depression' = depreposparto)
attr(df$postpartum_depression, 'Description') <- "Diagnosis of post-partum depression in the mother" ### ???? more?
df <- df %>% mutate('depression_tri1' = DEPRESSION1)
df <- df %>% mutate('depression_tri2' = DEPRESSION2)
df <- df %>% mutate('depression_tri3' = DEPRESSION3)
attr(df$depression_tri1, 'Description') <- 'antenatal depression during the 1st trimester.'
attr(df$depression_tri2, 'Description') <- 'antenatal depression during the 2nd trimester.'
attr(df$depression_tri3, 'Description') <- 'antenatal depression during the 3rd trimester.'
df <- df %>% mutate('epds' = EPDS)
attr(df$epds, 'Description') <- 'continuous metric of postnatal depression' #measurement name, type?
# --------- CORTISOL metrics
df <- df %>% mutate('cortisol_tri1' = Cortisol1) #WHAT IS LNCORTISOL1 ????? - Two metrics for coritsol??
df <- df %>% mutate('cortisol_tri2' = Cortisol2)
df <- df %>% mutate('cortisol_tri3' = Cortisol3)
attr(df$cortisol_tri1, 'Description') <- 'Cortisol levels in mothers during pregnancy during the 1st trimester' # units??? look into lncortisol variable, which one should we be using?
attr(df$cortisol_tri2, 'Description') <- 'Cortisol levels in mothers during pregnancy during the 2nd trimester'
attr(df$cortisol_tri3, 'Description') <- 'Cortisol levels in mothers during pregnancy during the 3rd trimester'
# ---------- SELECT
df_tidy <- df %>% select(age, education_level, nationality, postpartum_depression, employed, occupation, sport, sport, pet, dyed_hair, first_pregnancy, wanted_pregnancy, pregnancy_method, previous_miscarriage, fetus_sex, epds, depression_tri1, depression_tri2, depression_tri3, cortisol_tri1, cortisol_tri2, cortisol_tri3 )
#glimpse(df_tidy)
write.csv(df_tidy, file='./../../data/tidy_data.csv') # this might drop Description attribute
save(df_tidy, file='./../../data/tidy_data.Rdata') # this way we can always load it back as is. knitr::opts_chunk$set(warning = TRUE, message = TRUE, echo = FALSE) #this has to go before the codebook() call, make sure echo=FALSE is included.
my_codebook <- codebook(df_tidy) #my_codebook # (TAKES FOREVER TO KNIT) This will produce a codebook, but we need to have this knitted in it's own html file and not echo the code chunks... ## Warning in codebook(df_tidy): The variables session, created, ended have
## to be defined for automatic survey repetition detection to work. Set to no
## repetition by default.
## No missings.
knitr::opts_chunk$set(warning = TRUE, message = TRUE, echo = TRUE)
fh<-file("tmp.Rmd")
writeLines(my_codebook, fh)
close(fh)
render(input='tmp.Rmd', output_file = 'group7_codebook.html', output_dir = getwd() )##
##
## processing file: tmp.Rmd
##
|
| | 0%
|
|.................................................................| 100%
## ordinary text without R code
## output file: tmp.knit.md
## "C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS tmp.utf8.md --to html4 --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash --output pandoc1dec10f878a7.html --smart --email-obfuscation none --self-contained --standalone --section-divs --template "C:\R-3.5.1\library\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable "theme:bootstrap" --include-in-header "C:\Users\natha\AppData\Local\Temp\Rtmp69dgLm\rmarkdown-str1dec176f754a.html" --mathjax --variable "mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
##
## Output created: group7_codebook.html
[1] codebook package:
Preprint Arslan, R. C. (2018). How to automatically generate rich codebooks from study metadata. doi:10.31234/osf.io/5qc6h
Zenodo Arslan, R. C. (2018). Automatic codebooks from survey metadata (2018). URL https://github.com/rubenarslan/codebook. DOI